1. Function definition (hidden in html)

2. Data Simulation

n <- 50 # number of trajectories per noise level
noises <- seq(0.2, 3, 0.2)
freq <- 1 # compute sinus every 1 time unit

# Phase shifted
ps <- multi_sims(type = "ps", noises = noises, n = n, freq = freq)
ps
##        Time variable      value noise
##     1:    0       V1  0.0000000     0
##     2:    1       V1  0.8414710     0
##     3:    2       V1  0.9092974     0
##     4:    3       V1  0.1411200     0
##     5:    4       V1 -0.7568025     0
##    ---                               
## 79996:   95      V50 -0.6622294     3
## 79997:   96      V50  0.2727111     3
## 79998:   97      V50  0.9569223     3
## 79999:   98      V50  0.7613435     3
## 80000:   99      V50 -0.1342110     3
# Phase shifted with trend
pst <- multi_sims(type = "pst", noises = noises, n = n, slope = 0.1, freq = freq)
# Amplitude noise
na <- multi_sims(type = "na", noises = noises, n = n, freq = freq)

plot_sim(ps)

plot_sim(pst)

plot_sim(na)

3. Distance to Mean

For each condition, this computes the distance between the average trajectory and each individual trajectory.

cond <- "noise"
ti <- "Time"
mea <- "value"
lab <- "variable"

ps_mean <- dist_mean(data = ps, condition = cond, tcol = ti, measure = mea, label = lab, return.mean = F)
ps_mean
##      noise variable euclid_to_mean
##   1:     0       V1       0.000000
##   2:     0       V2       0.000000
##   3:     0       V3       0.000000
##   4:     0       V4       0.000000
##   5:     0       V5       0.000000
##  ---                              
## 796:     3      V46       6.957651
## 797:     3      V47       6.799990
## 798:     3      V48       6.338835
## 799:     3      V49       6.452373
## 800:     3      V50       7.543223
pst_mean <- dist_mean(data = pst, condition = cond, tcol = ti, measure = mea, label = lab, return.mean = F)
na_mean <- dist_mean(data = na, condition = cond, tcol = ti, measure = mea, label = lab, return.mean = F)

4. Pairwise Distances: Overlap clipping, Correlations Pearson, Spearman, Kendall

For each condition, take each pair of trajectories and compute the correlations between them (Pearson, Spearman and Kendall). In addition it clips the trajectories by comparison with their rolling means: whenever the trajectory is above its rolling mean, set it to 1 otherwise to 0. Finally for each pair of trajectories, the “overlap” metric, represents how often the trajectories of the pair have same value (i.e. 0.5 if complete random, 1 if they fully overlap).

This takes quite a while (partially) because it is implemented with for loops, but is still performed in reasonable time (i.e. a few minutes)

ps_pw <- all_pairwise_stats(data = ps, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
ps_pw
##        noise Label1 Label2 Overlap     Pearson     Spearman       Kendall
##     1:     0     V1     V2    1.00  1.00000000  1.000000000  1.0000000000
##     2:     0     V1     V3    1.00  1.00000000  1.000000000  1.0000000000
##     3:     0     V1     V4    1.00  1.00000000  1.000000000  1.0000000000
##     4:     0     V1     V5    1.00  1.00000000  1.000000000  1.0000000000
##     5:     0     V1     V6    1.00  1.00000000  1.000000000  1.0000000000
##    ---                                                                   
## 19596:     3    V47    V49    0.40 -0.23733070 -0.202256226 -0.1377777778
## 19597:     3    V47    V50    0.08 -0.97603113 -0.973021302 -0.8626262626
## 19598:     3    V48    V49    0.89  0.94887913  0.951407141  0.8161616162
## 19599:     3    V48    V50    0.41 -0.29638004 -0.275751575 -0.1834343434
## 19600:     3    V49    V50    0.52  0.02022916 -0.002508251  0.0004040404
pst_pw <- all_pairwise_stats(data = pst, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
na_pw <- all_pairwise_stats(data = na, condition = cond, label = lab, measure = mea, k_roll_mean = 5)

5. Plots of the distributions (code hidden in HTML)

Euclidean distance to the mean trajectory / !!! scale in y !!!

Pairwise Overlap of clipped trajectories

Pairwise Pearson correlations

Pairwise Spearman correlations

Pairwise Kendall correlations

6. Relations between the metrics

cor(ps_pw[, 4:7])
##            Overlap   Pearson  Spearman   Kendall
## Overlap  1.0000000 0.9888383 0.9905414 0.9983633
## Pearson  0.9888383 1.0000000 0.9997731 0.9903708
## Spearman 0.9905414 0.9997731 1.0000000 0.9921283
## Kendall  0.9983633 0.9903708 0.9921283 1.0000000
cor(pst_pw[, 4:7])
##            Overlap   Pearson  Spearman   Kendall
## Overlap  1.0000000 0.9894297 0.9897065 0.9857815
## Pearson  0.9894297 1.0000000 0.9998155 0.9733336
## Spearman 0.9897065 0.9998155 1.0000000 0.9745133
## Kendall  0.9857815 0.9733336 0.9745133 1.0000000
cor(na_pw[, 4:7])
##            Overlap   Pearson  Spearman   Kendall
## Overlap  1.0000000 0.9524214 0.9535094 0.9674259
## Pearson  0.9524214 1.0000000 0.9961731 0.9808621
## Spearman 0.9535094 0.9961731 1.0000000 0.9808846
## Kendall  0.9674259 0.9808621 0.9808846 1.0000000

7. How to go from a distribution to a single value for each metrics

Distance to mean trajectory

For each condition, count the proportion of trajectories that are lying more than x standard deviation from the mean (conversion to z-score first?).

Correlations and Overlap

Get a p-value: Assume that they are normally distributed, check that the distributions lies above 0 for correlations and above 0.5 for clipping (t-test).

ps_pw[noise !=0 , .(p.val.pears = t.test(Pearson)$p.value,
                    p.val.spear = t.test(Spearman)$p.value,
                    p.val.kenda = t.test(Kendall)$p.value,
                    p.val.ovrlp = t.test(Overlap, mu = 0.5)$p.value), by = noise]
##     noise   p.val.pears   p.val.spear   p.val.kenda   p.val.ovrlp
##  1:   0.2  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  2:   0.4  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  3:   0.6  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  4:   0.8 2.474567e-138 1.049048e-136 1.034254e-134 1.055455e-130
##  5:   1.0 2.493862e-140 6.910645e-139 7.692519e-135 1.416701e-128
##  6:   1.2  5.749557e-34  2.167627e-34  7.169215e-34  3.514574e-33
##  7:   1.4  2.019084e-02  2.420775e-02  2.747831e-02  2.148950e-02
##  8:   1.6  8.869206e-01  9.078501e-01  8.690393e-01  9.146619e-01
##  9:   1.8  3.019246e-05  2.762564e-05  3.251085e-05  3.816894e-05
## 10:   2.0  9.188862e-01  9.059285e-01  7.920694e-01  7.309774e-01
## 11:   2.2  1.987143e-01  1.945878e-01  1.664291e-01  1.796073e-01
## 12:   2.4  6.747538e-01  6.901232e-01  6.969219e-01  7.264563e-01
## 13:   2.6  4.175480e-01  4.123208e-01  4.313395e-01  4.549496e-01
## 14:   2.8  3.083033e-01  3.167636e-01  3.653296e-01  3.565258e-01
## 15:   3.0  6.988920e-01  7.066568e-01  6.593373e-01  6.280499e-01
pst_pw[noise !=0 , .(p.val.pears = t.test(Pearson)$p.value,
                    p.val.spear = t.test(Spearman)$p.value,
                    p.val.kenda = t.test(Kendall)$p.value,
                    p.val.ovrlp = t.test(Overlap, mu = 0.5)$p.value), by = noise]
##     noise p.val.pears p.val.spear p.val.kenda   p.val.ovrlp
##  1:   0.2           0           0           0  0.000000e+00
##  2:   0.4           0           0           0  0.000000e+00
##  3:   0.6           0           0           0  0.000000e+00
##  4:   0.8           0           0           0 1.351011e-250
##  5:   1.0           0           0           0  3.070666e-66
##  6:   1.2           0           0           0  1.327521e-47
##  7:   1.4           0           0           0  5.823719e-05
##  8:   1.6           0           0           0  1.035992e-04
##  9:   1.8           0           0           0  1.546523e-01
## 10:   2.0           0           0           0  6.632032e-01
## 11:   2.2           0           0           0  9.954057e-09
## 12:   2.4           0           0           0  2.961142e-01
## 13:   2.6           0           0           0  6.034313e-01
## 14:   2.8           0           0           0  4.142044e-01
## 15:   3.0           0           0           0  2.985493e-01
na_pw[noise !=0 , .(p.val.pears = t.test(Pearson)$p.value,
                    p.val.spear = t.test(Spearman)$p.value,
                    p.val.kenda = t.test(Kendall)$p.value,
                    p.val.ovrlp = t.test(Overlap, mu = 0.5)$p.value), by = noise]
##     noise   p.val.pears   p.val.spear   p.val.kenda   p.val.ovrlp
##  1:   0.2  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  2:   0.4  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  3:   0.6  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  4:   0.8  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  5:   1.0  0.000000e+00  0.000000e+00  0.000000e+00  0.000000e+00
##  6:   1.2  0.000000e+00  0.000000e+00  0.000000e+00 8.474046e-294
##  7:   1.4  0.000000e+00  0.000000e+00  0.000000e+00 4.348222e-187
##  8:   1.6  0.000000e+00  0.000000e+00  0.000000e+00 4.190186e-131
##  9:   1.8 1.587742e-261 1.777216e-257 4.372247e-255  3.720415e-77
## 10:   2.0 3.411216e-250 7.318420e-232 7.017019e-231  4.345561e-68
## 11:   2.2 6.071381e-144 6.366061e-137 3.982633e-136  3.223184e-33
## 12:   2.4 4.035454e-116 2.781478e-104 8.909083e-103  8.435194e-24
## 13:   2.6 2.904902e-112 7.045786e-102 3.235532e-101  8.840405e-16
## 14:   2.8  2.698114e-73  6.407591e-72  5.305944e-72  3.122165e-18
## 15:   3.0  2.820193e-85  3.114461e-79  8.704064e-79  9.873313e-20

8. Effect of window size in rolling mean for clipped trajectories

If we increase sampling rate, it means that there is more points per oscillation. Visually this means that the average trajectory is smoother.

plot_sim(sim_phase_shifted(n = 50, noise = 0.4, freq = 2), use.facet = F)

plot_sim(sim_phase_shifted(n = 50, noise = 0.4, freq = 0.2), use.facet = F)

The role of the rolling mean in clipping, is to “cut the oscillation in 2”. A good case is like:

# Contains 6 to 7 points per oscillation
sim <- sim_phase_shifted(n = 1, noise = 0, freq = 1)
plot(sim$Time, sim$value, type = "b")
lines(x=sim$Time, y=rollex(sim$value, k = 6), col = "red", lwd = 1.5)

# clip trajectory and scale for clarity on the plot
clip_traj <- wrap_clip(x = sim$value, k = 6)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x=sim$Time, y=clip_traj, col = "blue", type = "S")

It is a good case since the rolling mean (in red), cuts the trajectory (in black) at half-period (i.e. near to 0). This results in a clipped trajectory (in blue) that is up when the oscillation is “above 0” and down when its “below 0”.

Now we multiply the sampling rate by 5, but keep the same window size for the moving average in the top case, and multiply it also by 5 in the bottom case:

par(mfrow = c(2,1))
# Contains more than 35 points per oscillation
sim <- sim_phase_shifted(n = 1, noise = 0, freq = 0.2)

size_window <- 6
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")

size_window <- 35
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")

As we can see the moving average follows the signal perfectly with the small window, whereas it is out of phase with the larger window with much smaller amplitude. Clipping trajectory is ok in both cases. Nevertheless the top case is bad, why? Don’t forget that we’ve been working with perfect sinusoid so far, let’s see what happens with a tiny bit of noise in the amplitude of the signal:

par(mfrow = c(2,1))
# Contains more than 35 points per oscillation
sim <- sim_noisy_amplitude(n = 1, noise = 0.1, freq = 0.2)

size_window <- 6
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")

size_window <- 35
plot(sim$Time, sim$value, type = "b")
lines(x = sim$Time, y = rollex(sim$value, k = size_window), col = "red", lwd = 1.5)
clip_traj <- wrap_clip(x = sim$value, k = size_window)
clip_traj <- ifelse(clip_traj == 0, -1, 1)
lines(x = sim$Time, y = clip_traj, col = "blue", type = "S")

As we observe, clipped trajectory becomes completely random with the small window, whereas it remains identical with the larger one. So we should choose the window size for the rolling mean such that it covers the number of points in an oscillation.

On multiple simulations the distributions look like:

# Already defined in section 4.
# na <- multi_sims(type = "na", noises = noises, n = n, freq = 1)
# na_pw <- all_pairwise_stats(data = na, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
# q3 <- ggplot(na_pw, aes(x = noise, y = Overlap)) + geom_boxplot(aes(group = noise)) + ggtitle("Noisy Amplitude") + scale_y_continuous(limits = c(0,1))

na_high_freq <- multi_sims(type = "na", noises = noises, n = n, freq = 0.2)
na_high_freq_pw <- all_pairwise_stats(data = na_high_freq, condition = cond, label = lab, measure = mea, k_roll_mean = 5)
q3_high <- ggplot(na_high_freq_pw, aes(x = noise, y = Overlap)) + geom_boxplot(aes(group = noise)) + ggtitle("Window Size: 5; Freq: 0.2 (window too small)") + scale_y_continuous(limits = c(0,1))
grid.arrange(q3 + ggtitle("Window Size: 5; Freq: 1 (window of the right size)"), q3_high, ncol = 2)